import matplotlib.pyplot as plt
from sklearn import datasets
import pandas as pd
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
import warnings
from sklearn.impute import SimpleImputer
warnings.filterwarnings("ignore")
# for csv file
#link='https://drive.google.com/file/d/14aEtw3PE3kZ5fY1i5GrcKQKXVldYRGcS/view?usp=sharing'
#id = link.split("/")[-2]
#downloaded = drive.CreateFile({'id':id})
##downloaded.GetContentFile('owid-covid-data.csv')
df = pd.read_csv('owid-covid-data.csv')
print(df.shape)
(83211, 59)
# for csv file
#link='https://drive.google.com/file/d/13QYgFjQ3ztf_TxgK21CobYI3hCchAqFm/view?usp=sharing'
#id = link.split("/")[-2]
#downloaded = drive.CreateFile({'id':id})
#downloaded.GetContentFile('govt_index.csv')
df_gov = pd.read_csv('govt_index.csv')
print(df_gov.shape)
(137690, 51)
# for csv file
#link='https://drive.google.com/file/d/1m1DH6MDIf_A-2FaDASROgiPfyZZmAYMQ/view?usp=sharing'
#id = link.split("/")[-2]
#downloaded = drive.CreateFile({'id':id})
#downloaded.GetContentFile('govt_index.csv')
df_vaccine = pd.read_csv('govt_index.csv')
print(df_vaccine.shape)
(137690, 51)
df.head()
| iso_code | continent | location | date | total_cases | new_cases | new_cases_smoothed | total_deaths | new_deaths | new_deaths_smoothed | ... | gdp_per_capita | extreme_poverty | cardiovasc_death_rate | diabetes_prevalence | female_smokers | male_smokers | handwashing_facilities | hospital_beds_per_thousand | life_expectancy | human_development_index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AFG | Asia | Afghanistan | 2020-02-24 | 1.0 | 1.0 | NaN | NaN | NaN | NaN | ... | 1803.987 | NaN | 597.029 | 9.59 | NaN | NaN | 37.746 | 0.5 | 64.83 | 0.511 |
| 1 | AFG | Asia | Afghanistan | 2020-02-25 | 1.0 | 0.0 | NaN | NaN | NaN | NaN | ... | 1803.987 | NaN | 597.029 | 9.59 | NaN | NaN | 37.746 | 0.5 | 64.83 | 0.511 |
| 2 | AFG | Asia | Afghanistan | 2020-02-26 | 1.0 | 0.0 | NaN | NaN | NaN | NaN | ... | 1803.987 | NaN | 597.029 | 9.59 | NaN | NaN | 37.746 | 0.5 | 64.83 | 0.511 |
| 3 | AFG | Asia | Afghanistan | 2020-02-27 | 1.0 | 0.0 | NaN | NaN | NaN | NaN | ... | 1803.987 | NaN | 597.029 | 9.59 | NaN | NaN | 37.746 | 0.5 | 64.83 | 0.511 |
| 4 | AFG | Asia | Afghanistan | 2020-02-28 | 1.0 | 0.0 | NaN | NaN | NaN | NaN | ... | 1803.987 | NaN | 597.029 | 9.59 | NaN | NaN | 37.746 | 0.5 | 64.83 | 0.511 |
5 rows × 59 columns
print("total size of data is = ",df.shape)
print()
missing_values_count = df.isnull().sum()
print("Feature with highest number of missing data is ,")
print(missing_values_count.nlargest(n=50))
print()
print("% data which is missing = ",missing_values_count.sum()/np.product(df.shape)*100)
total size of data is = (83211, 59) Feature with highest number of missing data is , weekly_icu_admissions 82437 weekly_icu_admissions_per_million 82437 weekly_hosp_admissions 81938 weekly_hosp_admissions_per_million 81938 people_fully_vaccinated 77676 people_fully_vaccinated_per_hundred 77676 new_vaccinations 76192 people_vaccinated 75583 people_vaccinated_per_hundred 75583 total_vaccinations 74919 total_vaccinations_per_hundred 74919 icu_patients 74456 icu_patients_per_million 74456 hosp_patients 72642 hosp_patients_per_million 72642 new_vaccinations_smoothed 69837 new_vaccinations_smoothed_per_million 69837 total_tests 45575 total_tests_per_thousand 45575 new_tests 45299 new_tests_per_thousand 45299 handwashing_facilities 45059 tests_per_case 42048 positive_rate 41456 new_tests_smoothed 39780 new_tests_smoothed_per_thousand 39780 tests_units 38360 extreme_poverty 31887 male_smokers 24565 female_smokers 23686 reproduction_rate 15907 hospital_beds_per_thousand 13883 stringency_index 12418 total_deaths_per_million 11949 new_deaths_per_million 11791 total_deaths 11522 new_deaths 11364 aged_65_older 9069 aged_70_older 8632 median_age 8211 gdp_per_capita 7893 human_development_index 7378 cardiovasc_death_rate 7267 diabetes_prevalence 6205 population_density 5757 life_expectancy 4242 continent 4030 new_cases_smoothed_per_million 3368 new_deaths_smoothed_per_million 3368 new_cases_smoothed 2933 dtype: int64 % data which is missing = 40.3873632254862
temp_df = df[~np.isnan(df['weekly_icu_admissions' ])]
print(temp_df.shape)
print(temp_df.groupby(['location']).mean())
(774, 59)
total_cases new_cases new_cases_smoothed total_deaths \
location
Cyprus 1.740907e+04 181.682927 193.341439 104.951220
Czechia 4.396877e+05 2465.228070 4014.105228 7167.157895
Estonia 2.653748e+04 313.227273 357.500000 254.522727
France 1.572342e+06 14752.034483 13175.852207 45885.120690
Greece 7.410264e+04 527.525424 763.355881 2362.637931
Iceland 6.286000e+03 0.000000 4.000000 29.000000
Ireland 8.791754e+04 666.888889 633.341278 2336.074074
Israel 2.933627e+05 1541.745763 2026.731186 2336.070175
Latvia 3.812872e+04 283.230769 404.824154 696.500000
Lithuania 1.382320e+05 1169.592593 1203.798963 2071.333333
Malta 1.398769e+04 122.655172 132.581276 195.793103
Netherlands 4.165076e+05 3531.150000 3458.668237 8470.593220
Norway 3.047920e+04 179.533333 260.268847 341.879310
Slovenia 6.458516e+04 363.896552 571.564000 1300.758621
Spain 1.135766e+06 522.421875 7726.265286 38779.728814
Sweden 2.332423e+05 90.523810 2041.127016 6868.206897
new_deaths new_deaths_smoothed total_cases_per_million \
location
Cyprus 0.658537 0.989585 19875.662805
Czechia 54.368421 71.243088 41057.839754
Estonia 2.863636 3.201295 20005.048659
France 138.396552 248.455672 23072.559086
Greece 23.413793 22.910424 7109.493000
Iceland 0.000000 0.000000 18420.513000
Ireland 5.222222 12.261907 17805.015981
Israel 17.333333 15.338949 33893.057085
Latvia 7.000000 7.472564 20214.546462
Lithuania 18.296296 19.423333 50777.819111
Malta 1.620690 1.852103 31679.397897
Netherlands 25.677966 41.469797 24307.596783
Norway 0.362069 1.714390 5622.180333
Slovenia 9.275862 10.243914 31066.506810
Spain 62.728814 174.560079 24291.965266
Sweden 2.724138 31.265254 23094.961651
new_cases_per_million new_cases_smoothed_per_million \
location
Cyprus 207.424561 220.734878
Czechia 230.201930 374.835421
Estonia 236.123727 269.498250
France 216.471517 193.342586
Greece 50.611356 73.237288
Iceland 0.000000 11.722000
Ireland 135.057981 128.263944
Israel 178.122322 234.154254
Latvia 150.159179 214.624026
Lithuania 429.635370 442.200741
Malta 277.790138 300.270828
Netherlands 206.079700 201.849644
Norway 33.116667 48.009119
Slovenia 175.040155 274.931534
Spain 11.173641 165.250841
Sweden 8.963413 202.106333
total_deaths_per_million ... gdp_per_capita extreme_poverty \
location ...
Cyprus 119.821122 ... 32415.132 NaN
Czechia 669.266070 ... 32605.906 NaN
Estonia 191.869659 ... 29481.252 0.5
France 673.318810 ... 38605.671 NaN
Greece 226.674224 ... 24574.382 1.5
Iceland 84.982000 ... 46482.958 0.2
Ireland 473.100556 ... 67335.293 0.2
Israel 269.893018 ... 33132.320 0.5
Latvia 369.260500 ... 25063.846 0.7
Lithuania 760.878815 ... 29524.265 0.7
Malta 443.433310 ... 36513.323 0.2
Netherlands 494.348186 ... 48472.545 NaN
Norway 63.062845 ... 64800.057 0.2
Slovenia 625.685983 ... 31400.840 NaN
Spain 829.428085 ... 34272.360 1.0
Sweden 680.069621 ... 46949.283 0.5
cardiovasc_death_rate diabetes_prevalence female_smokers \
location
Cyprus 141.171 9.24 19.6
Czechia 227.485 6.82 30.5
Estonia 255.569 4.02 24.5
France 86.060 4.77 30.1
Greece 175.695 4.55 35.3
Iceland 117.992 5.31 14.3
Ireland 126.459 3.28 23.0
Israel 93.320 6.74 15.4
Latvia 350.060 4.91 25.6
Lithuania 342.989 3.67 21.3
Malta 168.711 8.83 20.9
Netherlands 109.361 5.29 24.4
Norway 114.316 5.31 19.6
Slovenia 153.493 7.25 20.1
Spain 99.403 7.17 27.4
Sweden 133.982 4.79 18.8
male_smokers handwashing_facilities hospital_beds_per_thousand \
location
Cyprus 52.7 NaN 3.400
Czechia 38.3 NaN 6.630
Estonia 39.3 NaN 4.690
France 35.6 NaN 5.980
Greece 52.0 NaN 4.210
Iceland 15.2 NaN 2.910
Ireland 25.7 NaN 2.960
Israel 35.4 NaN 2.990
Latvia 51.0 NaN 5.570
Lithuania 38.0 NaN 6.560
Malta 30.2 NaN 4.485
Netherlands 27.3 NaN 3.320
Norway 20.7 NaN 3.600
Slovenia 25.0 NaN 4.500
Spain 31.4 NaN 2.970
Sweden 18.9 NaN 2.220
life_expectancy human_development_index
location
Cyprus 80.98 0.887
Czechia 79.38 0.900
Estonia 78.74 0.892
France 82.66 0.901
Greece 82.24 0.888
Iceland 82.99 0.949
Ireland 82.30 0.955
Israel 82.97 0.919
Latvia 75.29 0.866
Lithuania 75.93 0.882
Malta 82.53 0.895
Netherlands 82.28 0.944
Norway 82.40 0.957
Slovenia 81.32 0.917
Spain 83.56 0.904
Sweden 82.80 0.945
[16 rows x 54 columns]
# INDIA
df_india = df[df['location']=='India']
df_india_total_cases = df_india['total_cases']
df_india_total_cases = df_india_total_cases.fillna(0)
# plt.plot(df_india_total_cases)
df_india_total_cases_per_million = df_india['total_cases_per_million']
df_india_total_cases_per_million = df_india_total_cases_per_million.fillna(0)
# plt.plot(df_india_total_cases_per_million)
df_india_total_tests = df_india['total_tests']
df_india_total_tests = df_india_total_tests.fillna(0)
# plt.plot(df_india_total_cases_tests)
df_india_new_cases = df_india['new_cases']
df_india_new_cases = df_india_new_cases.fillna(0)
# plt.plot(df_india_new_cases)
df_india_new_cases_per_million = df_india['new_cases_per_million']
df_india_new_cases_per_million = df_india_new_cases_per_million.fillna(0)
# plt.plot(df_india_new_cases_per_million)
df_india_total_death = df_india['total_deaths']
df_india_total_death = df_india_total_death.fillna(0)
# plt.plot(df_india_total_death)
df_india_total_deaths_per_million = df_india['total_deaths_per_million']
df_india_total_deaths_per_million = df_india_total_deaths_per_million.fillna(0)
# plt.plot(df_india_total_deaths_per_million)
# handle outlier here
df_india_new_death = df_india['new_deaths']
df_india_new_death = df_india_new_death.fillna(0)
# plt.plot(df_india_new_death)
df_india_new_deaths_per_million = df_india['new_deaths_per_million']
df_india_new_deaths_per_million = df_india_new_deaths_per_million.fillna(0)
# plt.plot(df_india_new_deaths_per_million)
df_india_reproduction = df_india['reproduction_rate']
# missing_values_count = df_india_reproduction.isnull().sum()
# print(missing_values_count)
x=df_india_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_india_reproduction = df_india_reproduction.fillna(val)
df_india_reproduction.iloc[-1] = val1
# plt.plot(df_india_reproduction)
df_india_stringency = df_india['stringency_index']
df_india_stringency = df_india_stringency.fillna(method = 'bfill', axis=0).fillna(df_india_stringency.dropna().mean())
plt.plot(df_india_stringency)
[<matplotlib.lines.Line2D at 0x1eb31ccf3a0>]
# ISRAEL
df_israel = df[df['location']=='Israel']
df_israel_total_cases = df_israel['total_cases']
df_israel_total_cases = df_israel_total_cases.fillna(0)
# plt.plot(df_israel_total_cases)
df_israel_total_cases_per_million = df_israel['total_cases_per_million']
df_israel_total_cases_per_million = df_israel_total_cases_per_million.fillna(0)
# plt.plot(df_israel_total_cases_per_million)
df_israel_total_tests = df_israel['total_tests']
df_israel_total_tests = df_israel_total_tests.fillna(0)
# plt.plot(df_israel_total_cases_tests)
df_israel_new_cases = df_israel['new_cases']
df_israel_new_cases = df_israel_new_cases.fillna(0)
# plt.plot(df_israel_new_cases)
df_israel_new_cases_per_million = df_israel['new_cases_per_million']
df_israela_new_cases_per_million = df_israel_new_cases_per_million.fillna(0)
# plt.plot(df_israel_new_cases_per_million)
df_israel_total_death = df_israel['total_deaths']
df_israel_total_death = df_israel_total_death.fillna(0)
# plt.plot(df_israel_total_death)
df_israel_total_deaths_per_million = df_israel['total_deaths_per_million']
df_israel_total_deaths_per_million = df_israel_total_deaths_per_million.fillna(0)
# plt.plot(df_israel_total_deaths_per_million)
# handle outlier here
df_israel_new_death = df_israel['new_deaths']
df_israel_new_death = df_israel_new_death.fillna(0)
# plt.plot(df_israel_new_death)
df_israel_new_deaths_per_million = df_israel['new_deaths_per_million']
df_israel_new_deaths_per_million = df_israel_new_deaths_per_million.fillna(0)
# plt.plot(df_israel_new_deaths_per_million)
df_israel_reproduction = df_israel['reproduction_rate']
# missing_values_count = df_israel_reproduction.isnull().sum()
# print(missing_values_count)
x=df_israel_reproduction.dropna()
val1 =x.iloc[-1]
# df_israel_reproduction = df_israel_reproduction.fillna(val)
# df_israel_reproduction.iloc[-1] = val1
# df_israel_reproduction.iloc[0] = val
df_israel_reproduction = df_israel_reproduction.fillna(method = 'bfill', axis=0).fillna(val1)
# plt.plot(df_israel_reproduction)
df_israel_stringency = df_israel['stringency_index']
df_israel_stringency = df_israel_stringency.fillna(method = 'bfill', axis=0).fillna(df_israel_stringency.dropna().mean())
plt.plot(df_israel_stringency)
[<matplotlib.lines.Line2D at 0x1eb31daa730>]
# USA
df_usa = df[df['location']=='United States']
df_usa_total_cases = df_usa['total_cases']
df_usa_total_cases = df_usa_total_cases.fillna(0)
# plt.plot(df_usa_total_cases)
df_usa_total_cases_per_million = df_usa['total_cases_per_million']
df_usa_total_cases_per_million = df_usa_total_cases_per_million.fillna(0)
# plt.plot(df_usa_total_cases_per_million)
df_usa_total_tests = df_usa['total_tests']
df_usa_total_tests = df_usa_total_tests.fillna(0)
# plt.plot(df_usa_total_cases_tests)
df_usa_new_cases = df_usa['new_cases']
df_usa_new_cases = df_usa_new_cases.fillna(0)
# plt.plot(df_usa_new_cases)
df_usa_new_cases_per_million = df_usa['new_cases_per_million']
df_usa_new_cases_per_million = df_usa_new_cases_per_million.fillna(0)
# plt.plot(df_usa_new_cases_per_million)
df_usa_total_death = df_usa['total_deaths']
df_usa_total_death = df_usa_total_death.fillna(0)
# plt.plot(df_usa_total_death)
df_usa_total_deaths_per_million = df_usa['total_deaths_per_million']
df_usa_total_deaths_per_million = df_usa_total_deaths_per_million.fillna(0)
# plt.plot(df_usa_total_deaths_per_million)
df_usa_new_death = df_usa['new_deaths']
df_usa_new_death = df_usa_new_death.fillna(0)
# plt.plot(df_usa_new_death)
df_usa_new_deaths_per_million = df_usa['new_deaths_per_million']
df_usa_new_deaths_per_million = df_usa_new_deaths_per_million.fillna(0)
# plt.plot(df_usa_new_deaths_per_million)
df_usa_reproduction = df_usa['reproduction_rate']
x=df_usa_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_usa_reproduction = df_usa_reproduction.fillna(val)
df_usa_reproduction.iloc[-1] = val1
# plt.plot(df_usa_reproduction)
df_usa_stringency = df_usa['stringency_index']
df_usa_stringency = df_usa_stringency.fillna(method = 'bfill', axis=0).fillna(df_usa_stringency.dropna().mean())
plt.plot(df_usa_stringency)
[<matplotlib.lines.Line2D at 0x1eb31e12070>]
# Italy
df_italy = df[df['location']=='Italy']
df_italy_total_cases = df_italy['total_cases']
df_italy_total_cases = df_italy_total_cases.fillna(0)
# plt.plot(df_italy_total_cases)
df_italy_total_cases_per_million = df_italy['total_cases_per_million']
df_italy_total_cases_per_million = df_italy_total_cases_per_million.fillna(0)
# plt.plot(df_italy_total_cases_per_million)
df_italy_total_tests = df_italy['total_tests']
df_italy_total_tests = df_italy_total_tests.fillna(0)
# plt.plot(df_italy_total_cases_tests)
df_italy_new_cases = df_italy['new_cases']
df_italy_new_cases = df_italy_new_cases.fillna(0)
# plt.plot(df_italy_new_cases)
# plt.show()
df_italy_new_cases_per_million = df_italy['new_cases_per_million']
df_italy_new_cases_per_million = df_italy_new_cases_per_million.fillna(0)
# plt.plot(df_italy_new_cases_per_million)
df_italy_total_death = df_italy['total_deaths']
df_italy_total_death = df_italy_total_death.fillna(0)
# plt.plot(df_italy_total_death)
df_italy_total_deaths_per_million = df_italy['total_deaths_per_million']
df_italy_total_deaths_per_million = df_italy_total_deaths_per_million.fillna(0)
# plt.plot(df_italy_total_deaths_per_million)
df_italy_new_death = df_italy['new_deaths'].clip(lower=0)
df_italy_new_death = df_italy_new_death.fillna(0)
# df_italy_new_death = df_italy_new_death.clip(lower=0)
# plt.plot(df_italy_new_death)
df_italy_new_deaths_per_million = df_italy['new_deaths_per_million'].clip(lower=0)
df_italy_new_deaths_per_million = df_italy_new_deaths_per_million.fillna(0)
# plt.plot(df_italy_new_deaths_per_million)
df_italy_reproduction = df_italy['reproduction_rate']
x=df_italy_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_italy_reproduction = df_italy_reproduction.fillna(val)
df_italy_reproduction.iloc[-1] = val1
# plt.plot(df_italy_reproduction)
df_italy_stringency = df_italy['stringency_index']
df_italy_stringency = df_italy_stringency.fillna(method = 'bfill', axis=0).fillna(df_italy_stringency.dropna().mean())
plt.plot(df_italy_stringency)
[<matplotlib.lines.Line2D at 0x1eb31e79670>]
# New Zealand
df_nz = df[df['location']=='New Zealand']
df_nz_total_cases = df_nz['total_cases']
df_nz_total_cases = df_nz_total_cases.fillna(0)
# plt.plot(df_nz_total_cases)
df_nz_total_cases_per_million = df_nz['total_cases_per_million']
df_nz_total_cases_per_million = df_nz_total_cases_per_million.fillna(0)
# plt.plot(df_nz_total_cases_per_million)
df_nz_total_tests = df_nz['total_tests']
df_nz_total_tests = df_nz_total_tests.fillna(0)
# plt.plot(df_nz_total_cases_tests)
df_nz_new_cases = df_nz['new_cases']
df_nz_new_cases = df_nz_new_cases.fillna(0)
# plt.plot(df_nz_new_cases)
# plt.show()
df_nz_new_cases_per_million = df_nz['new_cases_per_million']
df_nz_new_cases_per_million = df_nz_new_cases_per_million.fillna(0)
# plt.plot(df_nz_new_cases_per_million)
df_nz_total_death = df_nz['total_deaths']
df_nz_total_death = df_nz_total_death.fillna(0)
# plt.plot(df_nz_total_death)
df_nz_total_deaths_per_million = df_nz['total_deaths_per_million']
df_nz_total_deaths_per_million = df_nz_total_deaths_per_million.fillna(0)
# plt.plot(df_nz_total_deaths_per_million)
df_nz_new_death = df_nz['new_deaths']
df_nz_new_death = df_nz_new_death.fillna(0)
df_nz_new_death = df_nz_new_death.clip(lower=0)
# plt.plot(df_nz_new_death)
df_nz_new_deaths_per_million = df_nz['new_deaths_per_million']
df_nz_new_deaths_per_million = df_nz_new_deaths_per_million.fillna(0)
# plt.plot(df_nz_new_deaths_per_million)
df_nz_reproduction = df_nz['reproduction_rate']
x=df_nz_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_nz_reproduction = df_nz_reproduction.fillna(val)
df_nz_reproduction.iloc[-1] = val1
# plt.plot(df_nz_reproduction)
df_nz_stringency = df_nz['stringency_index']
df_nz_stringency = df_nz_stringency.fillna(method = 'bfill', axis=0).fillna(df_nz_stringency.dropna().mean())
plt.plot(df_nz_stringency)
[<matplotlib.lines.Line2D at 0x1eb31ed5550>]
# WORLD
df_w = df[df['location']=='World']
df_w_total_cases = df_w['total_cases']
df_w_total_cases = df_w_total_cases.fillna(0)
# plt.plot(df_israel_total_cases)
df_w_total_deaths = df_w['total_deaths']
df_w_total_deaths = df_w_total_death.fillna(0)
# plt.plot(df_israel_total_death)
df_w_total_tests = df_w['total_tests']
df_w_total_tests = df_w_total_tests.fillna(0)
# plt.plot(df_israel_total_death)
df_w_new_cases = df_w['new_cases']
df_w_new_cases = df_w_new_cases.fillna(0)
df_w_new_cases_smoothed = df_w['new_cases_smoothed']
df_w_new_cases_smoothed = df_w_new_cases.fillna(0)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-14-8cd7ca68c457> in <module> 7 8 df_w_total_deaths = df_w['total_deaths'] ----> 9 df_w_total_deaths = df_w_total_death.fillna(0) 10 # plt.plot(df_israel_total_death) 11 NameError: name 'df_w_total_death' is not defined
plt.figure()
plt.plot(df_india_stringency.to_numpy(),label='ind')
plt.plot(df_israel_stringency.to_numpy(),label='isr')
plt.plot(df_usa_stringency.to_numpy(),label='usa')
plt.plot(df_italy_stringency.to_numpy(),label='ita')
plt.plot(df_nz_stringency.to_numpy(),label='nz')
plt.legend()
plt.grid()
import pycountry
import plotly.express as px
df1 = df
list_countries = df1['location'].unique().tolist()
d_country_code = {}
for country in list_countries:
try:
country_data = pycountry.countries.search_fuzzy(country)
country_code = country_data[0].alpha_3
d_country_code.update({country: country_code})
except:
print('could not add ISO 3 code for ->', country)
d_country_code.update({country: ' '})
for k, v in d_country_code.items():
df1.loc[(df1.location == k), 'iso_alpha'] = v
could not add ISO 3 code for -> Asia could not add ISO 3 code for -> Cape Verde could not add ISO 3 code for -> Democratic Republic of Congo could not add ISO 3 code for -> Europe could not add ISO 3 code for -> European Union could not add ISO 3 code for -> Faeroe Islands could not add ISO 3 code for -> International could not add ISO 3 code for -> Laos could not add ISO 3 code for -> Micronesia (country) could not add ISO 3 code for -> North America could not add ISO 3 code for -> Northern Cyprus could not add ISO 3 code for -> Oceania could not add ISO 3 code for -> South America could not add ISO 3 code for -> South Korea could not add ISO 3 code for -> World
fig = px.choropleth(df1, # Input Dataframe
locations="iso_alpha", # identify country code column
color="total_cases", # identify representing column
hover_name="location", # identify hover name
animation_frame="date", # identify date column
projection="natural earth", # select projection
color_continuous_scale = 'Peach', # select prefer color scale
range_color=[0,40000000] # select range of dataset
)
fig.show()
x = np.array(["India","USA","New Zealand","Italy","Isreal"])
y = [df_india_total_cases_per_million.values[-2],df_usa_total_cases_per_million.values[-2],df_nz_total_cases_per_million.values[-2],df_italy_total_cases_per_million.values[-2],df_israel_total_cases_per_million.values[-2]]
plt.figure(2)
plt.figure(figsize=(18, 10))
ax = plt.subplot(111)
r2 = ax.bar(x, y, width=0.25, color='b', align='center')
#plt.legend(['Total Cases per Million Population'], loc=0, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, 1+height,'%0.1f' % float(height),ha='center', va='bottom')
plt.title('Total Cases per Million Population', fontsize=30)
plt.ylabel('Count per million', fontsize=20)
plt.xlabel('Country', fontsize=20)
autolabel(r2)
plt.show()
<Figure size 432x288 with 0 Axes>
plt.figure(figsize=(6, 4))
heatmap = sns.heatmap(df[['total_cases','total_tests','total_deaths']].corr(), vmin=0, vmax=1, annot=True)
heatmap.set_title('Pearson Correlation Heatmap', fontdict={'fontsize':12}, pad=12);
plt.figure(figsize=(6, 4))
corr = df[['total_cases','total_tests','total_deaths']].corr(method = 'spearman')
heatmap = sns.heatmap(corr, vmin=0, vmax=1, annot=True)
heatmap.set_title('Spearman Correlation Heatmap', fontdict={'fontsize':12}, pad=12);
x = np.array(["India","USA","New Zealand","Italy","Israel"])
y = [np.divide(df_india_total_tests.values,df_india_total_cases.values)[-3],np.divide(df_usa_total_tests.values,df_usa_total_cases.values)[-6],np.divide(df_nz_total_tests.values,df_nz_total_cases.values)[-3],np.divide(df_italy_total_tests.values,df_italy_total_cases.values)[-3],np.divide(df_israel_total_tests.values,df_israel_total_cases.values)[-4]]
plt.figure(2)
plt.figure(figsize=(18, 10))
ax = plt.subplot(111)
r2 = ax.bar(x, y, width=0.25, color='b', align='center')
#plt.legend(['Total Cases per Million Population'], loc=0, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, 1+height,'%0.1f' % float(height),ha='center', va='bottom')
plt.title('Test-Cases ratio', fontsize=30)
plt.ylabel('Ratio', fontsize=30)
plt.xlabel('Country', fontsize=30)
plt.xticks(fontsize= 20)
autolabel(r2)
plt.show()
<Figure size 432x288 with 0 Axes>
import plotly.express as px
x=np.array([df_india_total_tests.values[-3],df_usa_total_tests.values[-6],df_nz_total_tests.values[-3],df_italy_total_tests.values[-3],df_israel_total_tests.values[-4]])
y = np.array([df_india_total_cases.values[-3],df_usa_total_cases.values[-6],df_nz_total_cases.values[-3],df_italy_total_cases.values[-3],df_israel_total_cases.values[-4]])
tcr = np.array([np.divide(df_india_total_tests.values,df_india_total_cases.values)[-3],np.divide(df_usa_total_tests.values,df_usa_total_cases.values)[-6],np.divide(df_nz_total_tests.values,df_nz_total_cases.values)[-3],np.divide(df_italy_total_tests.values,df_italy_total_cases.values)[-3],np.divide(df_israel_total_tests.values,df_israel_total_cases.values)[-4]])
dat = pd.DataFrame(columns =['total_tests', 'total_cases','tcr',"location"])
dat['total_tests'] = pd.Series(x)
dat['total_cases'] = pd.Series(y)
dat['tcr'] = pd.Series(tcr)
dat['location'] = pd.Series(["India","USA","New Zealand","Italy","Israel"])
fig = px.scatter(dat, x="total_tests", y="total_cases",
size="tcr", color="tcr",
hover_name="location", size_max=70, text="location")
fig.update_traces(textposition='top center')
fig.show()
search = pd.read_csv('searchCOVID.csv')
world = df_w_total_cases[df_w_total_cases.index % 7 == 0]
df_india_total_cases = df_india_total_cases[df_india_total_cases.index % 7 == 0]
df_usa_total_cases = df_usa_total_cases[df_usa_total_cases.index % 7 == 0]
df_nz_total_cases = df_nz_total_cases[df_nz_total_cases.index % 7 == 0]
df_israel_total_cases = df_israel_total_cases[df_israel_total_cases.index % 7 == 0]
df_italy_total_cases = df_italy_total_cases[df_italy_total_cases.index % 7 == 0]
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['covid'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['vaccine'], color='g')
plt.plot([i for i in range(len(search['Week']))], world/max(world)*100, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total global cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Global)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
autolabel(r2)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['india_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['india_v'], color='g')
plt.plot((df_india_total_cases/max(df_india_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(India)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['usa_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['usa_v'], color='g')
plt.plot((df_usa_total_cases/max(df_usa_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(USA)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['nz_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['nz_v'], color='g')
plt.plot((df_nz_total_cases/max(df_nz_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(New Zealand)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['italy_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['italy_v'], color='g')
plt.plot((df_italy_total_cases/max(df_italy_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Italy)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['israel_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['israel_v'], color='g')
plt.plot((df_israel_total_cases/max(df_israel_total_cases)*100).values[0:-2], color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Israel)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
import math
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import statsmodels.tsa.api as smt
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
def model(treand):
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)]
y_train = y[0:len(x)]
from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(activation = 'tanh',solver='adam')
clf.fit(x_train,y_train)
train_pred = clf.predict(x_train)
plt.figure(figsize=(18, 10))
plt.plot(scaler.inverse_transform(train_pred.reshape(-1,1)),label='Prediction')
plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)),color='red', label='Original')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('World COVID cases')
plt.show()
return scaler.inverse_transform(train_pred.reshape(-1,1))
t1 = model(df_w_total_cases_smooth)
from sklearn.preprocessing import MinMaxScaler
def model(treand):
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)-100]
y_train = y[0:len(x)-100]
x_test = x[0:len(x)]
y_test = y[0:len(x)]
from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(activation = 'tanh',solver='adam')
clf.fit(x_train,y_train)
train_pred = clf.predict(x_test)
plt.figure(figsize=(18, 10))
plt.plot(scaler.inverse_transform(train_pred.reshape(-1,1)),label='Prediction')
plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)),color='red', label='Original')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('New Cases Globally')
plt.show()
return scaler.inverse_transform(train_pred.reshape(-1,1))
t1 = model(df_india_total_cases)
treand = df_india_total_cases
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)]
y_train = y[0:len(x)]